## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Introduction

This project came from Dr. Rebecca Silton’s lab in the Psychology Department at Loyola University. Dr. Silton’s lab, unable to collect data due to COVID-19, outsourced data collection to a company called NeuroUX. The goal of these games was to measure cognitive functioning across a number of tasks. The theory behind the use of these games is that depression causes changes in cognitive functioning. The sample came was recruited based on elevated depression scores, and the goal was to assess the effect of a mindfulness intervention on depression. Previous work showed the intervention was effective in decreasing self-reported depression scores, and the current study, outlined in this document, is attempting to use scores on NeuroUX games to examine similar changes over time.

Data Cleaning and merging

The NeuroUX dataset came from six different csv files, all formatted like the one below:

R1<-read.csv("round1_updated.csv")
str(R1)
## 'data.frame':    1525 obs. of  46 variables:
##  $ game_name                   : chr  "color trick 1" "color trick 1" "color trick 1" "color trick 1" ...
##  $ userid                      : chr  "user1" "user1" "user1" "user1" ...
##  $ sessionid                   : chr  "631d4ef0-32a3-4145-8847-d9995b9bf6a3" "631d4ef0-32a3-4145-8847-d9995b9bf6a3" "631d4ef0-32a3-4145-8847-d9995b9bf6a3" "631d4ef0-32a3-4145-8847-d9995b9bf6a3" ...
##  $ event_type                  : chr  "trial" "trial" "trial" "trial" ...
##  $ is_response_correct         : logi  TRUE TRUE TRUE TRUE TRUE NA ...
##  $ trial_timestamp             : chr  "2020-10-08T18:44:32.349Z" "2020-10-08T18:44:33.916Z" "2020-10-08T18:44:35.450Z" "2020-10-08T18:44:29.616Z" ...
##  $ trial_number                : int  7 8 9 5 6 NA 1 2 3 4 ...
##  $ user_response               : chr  "[\"blue\"]" "[\"black\"]" "[\"orange\"]" "[\"red\"]" ...
##  $ correct_response            : chr  "[\"blue\"]" "[\"black\"]" "[\"orange\"]" "[\"red\"]" ...
##  $ response_reaction_time      : num  844 827 577 711 586 ...
##  $ response_timestamp          : chr  "2020-10-08T18:44:33.192Z" "2020-10-08T18:44:34.742Z" "2020-10-08T18:44:36.026Z" "2020-10-08T18:44:30.326Z" ...
##  $ correct_count               : int  7 8 9 5 6 9 1 2 3 4 ...
##  $ incorrect_count             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ mechanic_name               : chr  "meaning to meaning" "meaning to meaning" "meaning to meaning" "meaning to meaning" ...
##  $ total_help_time             : num  0 0 0 0 0 NA 0 0 0 0 ...
##  $ attempted                   : logi  NA NA NA NA NA NA ...
##  $ response_timeout            : logi  NA NA NA NA NA NA ...
##  $ score                       : int  7 8 9 5 6 NA 1 2 3 4 ...
##  $ trial_timeout_duration      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_trials                : int  9 9 9 9 9 9 9 9 9 9 ...
##  $ character_revealed          : logi  NA NA NA NA NA NA ...
##  $ level_character_distractor  : chr  "" "" "" "" ...
##  $ level_character_target      : chr  "" "" "" "" ...
##  $ level_distractor_probability: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ level_max_interval          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ level_min_interval          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ stimuli_delay_time          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ stimuli_type                : chr  "" "" "" "" ...
##  $ fastest_reaction_time       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ median_reaction_time        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ time_left                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ time_up                     : logi  NA NA NA NA NA NA ...
##  $ timer_duration              : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ hand_color                  : chr  "" "" "" "" ...
##  $ moving_direction            : chr  "" "" "" "" ...
##  $ pointing_direction          : chr  "" "" "" "" ...
##  $ average_reaction_time       : num  NA NA NA NA NA 896 NA NA NA NA ...
##  $ options                     : chr  "[{\"color\":\"brown\",\"isanswer\":false,\"text\":\"green\"},{\"color\":\"orange\",\"isanswer\":true,\"text\":\"| __truncated__ "[{\"color\":\"orange\",\"isanswer\":false,\"text\":\"green\"},{\"color\":\"blue\",\"isanswer\":true,\"text\":\""| __truncated__ "[{\"color\":\"brown\",\"isanswer\":true,\"text\":\"orange\"},{\"color\":\"blue\",\"isanswer\":false,\"text\":\""| __truncated__ "[{\"color\":\"blue\",\"isanswer\":false,\"text\":\"yellow\"},{\"color\":\"green\",\"isanswer\":true,\"text\":\""| __truncated__ ...
##  $ question                    : chr  "[{\"color\":\"pink\",\"text\":\"blue\"}]" "[{\"color\":\"pink\",\"text\":\"black\"}]" "[{\"color\":\"green\",\"text\":\"orange\"}]" "[{\"color\":\"blue\",\"text\":\"red\"}]" ...
##  $ question_prompt             : chr  "select the option which has the same meaning as the word in the top box." "select the option which has the same meaning as the word in the top box." "select the option which has the same meaning as the word in the top box." "select the option which has the same meaning as the word in the top box." ...
##  $ level_start_timestamp       : chr  "2020-10-08T18:44:08.297Z" "2020-10-08T18:44:08.297Z" "2020-10-08T18:44:08.297Z" "2020-10-08T18:44:08.297Z" ...
##  $ level_end_timestamp         : chr  "" "" "" "" ...
##  $ level_total_time            : int  NA NA NA NA NA 29958 NA NA NA NA ...
##  $ session_complete            : logi  NA NA NA NA NA NA ...
##  $ session_start_timestamp     : chr  "" "" "" "" ...
##  $ session_end_timestamp       : chr  "" "" "" "" ...

To create the NeuroUX dataset, we had to combine the six datasets, named R1, R2, …R6.

Based on conversations with Dr. Silton, we learned one user had played the game twice. We removed this user.

R5<- R5%>%
  filter(level_start_timestamp != "2021-03-25T18:55:58.767Z")

We also learned the user numbers reset across the two semesters (see the similarities between R1 and R4). We had to create new IDs based on these usernames across semesters.

head(R1$userid)
## [1] "user1" "user1" "user1" "user1" "user1" "user1"
head(R4$userid)
## [1] "user1" "user1" "user1" "user1" "user1" "user1"
R1<-mutate(R1, semester=1, timepoint=1, ID = as.factor(as.numeric(gsub("user", "", userid))+100))
R2<-mutate(R2, semester=1, timepoint=2, ID = as.factor(as.numeric(gsub("user", "", userid))+100))
R3<-mutate(R3, semester=1, timepoint=3, ID = as.factor(as.numeric(gsub("user", "", userid))+100))
R4<-mutate(R4, semester=2, timepoint=1, ID = as.factor(as.numeric(gsub("user", "", userid))+200))
R5<-mutate(R5, semester=2, timepoint=2, ID = as.factor(as.numeric(gsub("user", "", userid))+200))
R6<-mutate(R6, semester=2, timepoint=3, ID = as.factor(as.numeric(gsub("user", "", userid))+200))

#Now unique across timepoints
head(R1$ID)
## [1] 101 101 101 101 101 101
## 19 Levels: 101 102 103 104 105 106 107 108 109 111 112 113 114 115 116 ... 140
head(R4$ID)
## [1] 201 201 201 201 201 201
## Levels: 201 202 203 204 205 206 207 208 209 210 211 212 213 214
#Can combine with new unique IDs
Full<-rbind(R1, R2, R3, R4, R5, R6)

There was also one test user who was in the dataset who we needed to remove. This gave us our final dataset for the NeuroUX data. We can see there are hundreds of rows for each participant, which comes from multiple trials within each game.

Full <- Full%>%
  filter(ID != 140)

nrow(Full)
## [1] 7343
Full%>%
  group_by(ID)%>%
  summarize(n())
## # A tibble: 33 × 2
##    ID    `n()`
##    <fct> <int>
##  1 101     250
##  2 102     279
##  3 103     221
##  4 104     251
##  5 105     173
##  6 106     260
##  7 107     233
##  8 108     255
##  9 109     231
## 10 111     243
## # … with 23 more rows

The next step was cleaning the SMILE dataset, which is where we got our information for the depression measures (PHQ-9). The data came in wide format and needed to be moved to long to correspond with each trial of the gameplay. We were only interested in time one scores for our actual study, but examining changes in depression supports previous findings.

smile <- read.csv("SMILE full data 10 23 21_AARThesis_Deidentified_forNL.csv")

smile2 <- pivot_longer(smile,
                    cols = c(PHQ_T1_total, PHQ_T2_total, PHQ_T3_total),
                    names_to = "Time",
                    values_to = "PHQ.Score")

smile2 <- mutate(smile2, numTime = case_when(Time == "PHQ_T1_total" ~ 1,
                                             Time == "PHQ_T2_total" ~ 2,
                                             Time == "PHQ_T3_total" ~ 3))

smile2 <- select(smile2, StudyID_T1, numTime, PHQ.Score)

#We now have only 3 variables: ID, timepoint, and PHQ
str(smile2)
## tibble [435 × 3] (S3: tbl_df/tbl/data.frame)
##  $ StudyID_T1: int [1:435] 22585442 22585442 22585442 22585444 22585444 22585444 22585436 22585436 22585436 22585450 ...
##  $ numTime   : num [1:435] 1 2 3 1 2 3 1 2 3 1 ...
##  $ PHQ.Score : num [1:435] 16 12 13 12 5 ...

The final dataset to bring in was the key, which told us who was in the control and intervention group and which helped us merge the other two datsets. You can see this dataset is not in the most ideal format. You can also see that fall is listed second with a gap between the lists.

IntKey <- read.csv("NeurUX.Intervention.Group.csv")

head(IntKey)
##             X Study.ID NeurUX.ID  Headspace.ID Intervention.Group
## 1 Spring 2021       NA                                         NA
## 2             22585612     user3 SCILOY-LY3NC4                  1
## 3             22585613    user14 SCILOY-C2APOH                  1
## 4             22585614     user9 SCILOY-8CVNJR                  1
## 5             22585616     user6 SCILOY-AVW0KG                  1
## 6             22585619     user7 SCILOY-994GC6                  1
IntKey[15:19,]
##            X Study.ID NeurUX.ID  Headspace.ID Intervention.Group
## 15           22585630    user11 SCILOY-48YPVL                  0
## 16                 NA                                         NA
## 17 Fall 2020       NA                                         NA
## 18           22585580     user1 SCILOY-139ZCT                  1
## 19           22585581     user7 SCILOY-NDWT0Q                  1

We can fix this by pulling information from the column “Study.ID.” We can create semester variables based on this, which will allow us to create the same ID variable as we did in the NeuroUX dataset. In his mutate() call, we also create a factor version of the intervention variable.

sem1 <- IntKey[18:36, 2]
sem2 <- IntKey[2:15, 2]

IntKey <- mutate(IntKey, semester = case_when(Study.ID %in% sem1 ~ 1,
                                               Study.ID %in% sem2 ~ 2),
                          Int.Fac = factor(Intervention.Group,
                                           levels = c(0, 1),
                                           labels = c("Control", "Intervention")))

#From there, we use similar code as what we did in the NeuroUX dataset
IntKey1 <-filter(IntKey, semester == 1)
IntKey1 <- mutate(IntKey1, ID = as.factor(as.numeric(gsub("user", "", NeurUX.ID))+100))

IntKey2 <-filter(IntKey, semester == 2)
IntKey2 <- mutate(IntKey2, ID = as.factor(as.numeric(gsub("user", "", NeurUX.ID))+200))

IntKey <-rbind(IntKey1, IntKey2)

We can now start the merging process. First, we merge the keys with NeuroUX

Merge.1 <- merge(
                 x = Full,
                 y = IntKey,
                 by.x = "ID",
                 by.y = "ID"
)

We then merge that dataset with the PHQ data

Merge.2 <- merge(
                 x = Merge.1,
                 y = smile2,
                 by.x = c("Study.ID", "timepoint"),
                 by.y = c("StudyID_T1", "numTime")
)

With the new merged data, we can now finalize some data cleaning. We first want to look at what’s contained in the NeuroUX data.

levels(as.factor(Merge.2$game_name))
## [1] "color trick 1"     "color trick 2"     "color trick 3"    
## [4] "hand swype"        "playlist"          "quick tap level 2"

We see there are five different games in the dataset as well as a “playlist” variable

We don’t get much from playlist except for information about completion of the full trial. This can be pulled out from the larger dataset.

comp.data <- filter(Merge.2, game_name == "playlist")
COMP <- c("ID", "timepoint", "session_complete", "session_start_timestamp", "session_end_timestamp")
comp.data <- comp.data[COMP]
comp.data <- filter(comp.data, session_complete == TRUE)

#We have 87 total timepoints with completed data

sum(comp.data$session_complete, na.rm = TRUE)
## [1] 87

Can save the number of completed timepoints for each person

timepoints <- comp.data%>%
              group_by(ID)%>%
              summarize(n())
timepoints
## # A tibble: 33 × 2
##    ID    `n()`
##    <fct> <int>
##  1 101       3
##  2 102       3
##  3 103       3
##  4 104       3
##  5 105       2
##  6 106       3
##  7 107       3
##  8 108       3
##  9 109       3
## 10 111       3
## # … with 23 more rows

We aren’t using this for this study, but it could be useful for cross-checking datasets or confirming with Dr. Silton.

#Removing playlist from future analysis
Merge.2 <- filter(Merge.2, game_name != "playlist")

Reaction times are a potential outcome for this study. We first want to make sure the reaction times make sense. We start by doing this for the Color Trick game, which involves quickly selecting identifying either the content or font color of words like “blue”, “red”, etc. Participants played the color trick game three times.

##   tot.over.5 prop.over.5 tot.eq.0 prop.eq.0 total
## 1         30  0.01149425        0         0  2610

A small proportion of trials take over five seconds (not likely due to thinking about or processing the question). Only 30 of the 2610 responses fall into this category. We should likely drop these for calculation and chose to moving forward.

We then do this for Hand Swype, a game that shows a moving finger that points to the direction users should swipe their finger across the screen. Users get bonus time for correctly swiping, penalties for incorrectly swiping, and play until time runs out.

##   tot.over.5 prop.over.5 tot.eq.0  prop.eq.0 total
## 1         35  0.01118211       87 0.02779553  3130

Similarly, it was rare to take longer than 5 seconds to respond to hand swype. 35 out of 3130 were greater than 5s. 87 were equal to 0 (this represents the total number of trials, since all ended with a timeout)

Finally, we do this for Quick Tap 2, a game in which you wait for a symbol and either tap the screen or don’t based on what appears. This game tests your ability to quickly process and inhibit a response

##   tot.over.5 prop.over.5 tot.eq.0 prop.eq.0 total
## 1         10 0.007097232      647 0.4591909  1409

For this game, it appears most of the responses should be under one second. Only 10 responses of 1409 were over 1 second. Many responses were 0 (comes from trials in which respondents were supposed to not tap. Not removing these skews averages). 647 were eaual to zero, which comes from the game structure. For the calculation of reaction times, we won’t want to include the 0s.

We can create a dataset with no outliers using the code below:

CTdf <- filter(Merge.2, game_name %in% CT)
CTdf <- CTdf[CTdf$response_reaction_time < 5000,]
nrow(CTdf)
## [1] 2580
#Recall handswype is the one that has the "checkpoint style"
#More trials = more successful on the task
#Our RT is less important here (although there is a correlation between RT and number correct)
HSdf <- filter(Merge.2, game_name =="hand swype")
HSdf <- HSdf[HSdf$response_reaction_time < 5000,]
nrow(HSdf)
## [1] 3095
#Note that for this one, we only want to count RTs of targets (distractors are 0)
QTdf <- filter(Merge.2, game_name =="quick tap level 2")
QTdf <- QTdf[QTdf$response_reaction_time < 1000,]
QTdf <- QTdf[QTdf$response_reaction_time > 0,]
nrow(QTdf)
## [1] 752
#Combining these three
RTno.out <- rbind(CTdf, QTdf, HSdf)
nrow(Merge.2) 
## [1] 7149
nrow(RTno.out)
## [1] 6427
nrow(Merge.2)-nrow(RTno.out) #Should have removed 75 missing responses and 647 0s from QT
## [1] 722

The plot below shows the new reaction times across games. There are still some outliers, but the range is now a bit less variable.

We can calculate averages using the code below:

RTs <- RTno.out%>%
       group_by(ID, timepoint, game_name)%>%
       summarize(avgRT = mean(response_reaction_time, na.rm = TRUE))
## `summarise()` has grouped output by 'ID', 'timepoint'. You can override using the `.groups` argument.
RTs
## # A tibble: 437 × 4
## # Groups:   ID, timepoint [89]
##    ID    timepoint game_name         avgRT
##    <fct>     <dbl> <chr>             <dbl>
##  1 101           1 color trick 1      896.
##  2 101           1 color trick 2      979.
##  3 101           1 color trick 3     1056.
##  4 101           1 hand swype        1655.
##  5 101           1 quick tap level 2  463.
##  6 101           2 color trick 1      711.
##  7 101           2 color trick 2      819.
##  8 101           2 color trick 3     1008.
##  9 101           2 hand swype        1796.
## 10 101           2 quick tap level 2  428.
## # … with 427 more rows

We next want to look at the number correct for each trial. This is a second option for measuring success in the games (and therefore cognitive processing)

Correct <- Merge.2%>%
           group_by(ID, timepoint, game_name)%>%
           summarize(tot.correct   = sum(is_response_correct == TRUE, na.rm = TRUE),
                     tot.incorrect = sum(is_response_correct == FALSE, na.rm = TRUE),
                     prop.correct = tot.correct/(tot.correct + tot.incorrect))
## `summarise()` has grouped output by 'ID', 'timepoint'. You can override using the `.groups` argument.

We see there are some ceiling effects here. Many people got the majority, if not all, of the questions correct. We will still use these, but we have to keep this in mind.

Game_Data <- merge(
  x = Correct,
  y = RTs,
  by.x = c("ID", "timepoint", "game_name"),
  by.y = c("ID", "timepoint", "game_name")
)

str(Game_Data)
## 'data.frame':    436 obs. of  7 variables:
##  $ ID           : Factor w/ 33 levels "101","102","103",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ timepoint    : num  1 1 1 1 1 2 2 2 2 2 ...
##  $ game_name    : chr  "color trick 1" "color trick 2" "color trick 3" "hand swype" ...
##  $ tot.correct  : int  9 9 9 30 13 9 9 9 32 15 ...
##  $ tot.incorrect: int  0 0 0 5 2 0 0 0 1 0 ...
##  $ prop.correct : num  1 1 1 0.857 0.867 ...
##  $ avgRT        : num  896 979 1056 1655 463 ...

Can merge this game data with the original smile data using the code below

Game.Merge <- merge(
  x = Game_Data,
  y = IntKey,
  by.x = "ID",
  by.y = "ID"
)

PHQ.Corrs <- merge(
  x = Game.Merge,
  y = smile2,
  by.x = c("Study.ID", "timepoint"),
  by.y = c("StudyID_T1", "numTime"))

str(PHQ.Corrs)
## 'data.frame':    436 obs. of  15 variables:
##  $ Study.ID          : int  22585580 22585580 22585580 22585580 22585580 22585580 22585580 22585580 22585580 22585580 ...
##  $ timepoint         : num  1 1 1 1 1 2 2 2 2 2 ...
##  $ ID                : Factor w/ 33 levels "101","102","103",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ game_name         : chr  "color trick 1" "color trick 2" "color trick 3" "hand swype" ...
##  $ tot.correct       : int  9 9 9 30 13 9 9 9 32 15 ...
##  $ tot.incorrect     : int  0 0 0 5 2 0 0 0 1 0 ...
##  $ prop.correct      : num  1 1 1 0.857 0.867 ...
##  $ avgRT             : num  896 979 1056 1655 463 ...
##  $ X                 : chr  "" "" "" "" ...
##  $ NeurUX.ID         : chr  "user1" "user1" "user1" "user1" ...
##  $ Headspace.ID      : chr  "SCILOY-139ZCT" "SCILOY-139ZCT" "SCILOY-139ZCT" "SCILOY-139ZCT" ...
##  $ Intervention.Group: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ semester          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Int.Fac           : Factor w/ 2 levels "Control","Intervention": 2 2 2 2 2 2 2 2 2 2 ...
##  $ PHQ.Score         : num  16 16 16 16 16 10 10 10 10 10 ...
write.csv(PHQ.Corrs, "1204.Merge.csv")

Predictor Selection

In this section, we use the dataset from before to look at which variables are associated with depression scores. We also used multiple regression and model selection techniques to add support to the selection from the bivariate correlations.

This first plot above shows the correlation between reaction time and depression scores across all games and timepoints. Broadly, we see reaction time is not a great predictor of depression scores. We also see some counterintuitive relationships (such as in color trick 1, which seems to show faster reaction times are associated with higher levels of depression). There are also differences across time points. Quick tap has different relationships based on the timepoint.

These plots above show the relationship between correct answers and depression scores. We see the ceiling effects and how the relationships are driven by people who didn’t get 100% correct on some games. Hand swype is the only one with a reasonable amount of variability, but there is no association between these scores and depression. Color Trick 3 has an association, but it is counter-intuitive. Quick tap has a potentially significant relationship at time one, but we can go to data-driven approaches to confirm this.

We can look at the correlations of these variables with PHQ scores. We are particularly interested by time one, since these won’t be impacted by the intervention

PHQ.Corrs%>%
  group_by(timepoint, game_name)%>%
  summarize(cors = cor(PHQ.Score, avgRT))
## `summarise()` has grouped output by 'timepoint'. You can override using the `.groups` argument.
## # A tibble: 15 × 3
## # Groups:   timepoint [3]
##    timepoint game_name            cors
##        <dbl> <chr>               <dbl>
##  1         1 color trick 1     -0.344 
##  2         1 color trick 2     -0.206 
##  3         1 color trick 3     -0.203 
##  4         1 hand swype         0.104 
##  5         1 quick tap level 2  0.159 
##  6         2 color trick 1     -0.193 
##  7         2 color trick 2     -0.0939
##  8         2 color trick 3     -0.136 
##  9         2 hand swype         0.0749
## 10         2 quick tap level 2 -0.263 
## 11         3 color trick 1     -0.0685
## 12         3 color trick 2     -0.310 
## 13         3 color trick 3     -0.259 
## 14         3 hand swype         0.257 
## 15         3 quick tap level 2  0.352
Time1Corrs <- filter(PHQ.Corrs, timepoint == 1)
Time2Corrs <- filter(PHQ.Corrs, timepoint == 2)
Time3Corrs <- filter(PHQ.Corrs, timepoint == 3)

Time1Corrs.Wide <- pivot_wider(Time1Corrs,
                              id_cols = c(Study.ID, ID, NeurUX.ID, timepoint,
                                          Headspace.ID, Intervention.Group, semester, Int.Fac,
                                          PHQ.Score),
                              names_from = c(game_name),
                              values_from = c(tot.correct, tot.incorrect, prop.correct,
                                              avgRT))
library(corrplot)
T1s <- data.frame(Time1Corrs.Wide[9:29])
T1s <- mutate_if(T1s, is.integer, as.numeric)

T1 <- cor(T1s)
#We can look at the full correlations, but all we care about is PHQ
corrplot(T1)

We can look at correlations with PHQ across measures and see which emerge with larger correlation coefficients

T1.Corrs <- data.frame(rbind(rownames(T1), T1[1:21]))
T1.Corrs <- data.frame(t(T1.Corrs))
T1.Corrs <- mutate(T1.Corrs, Corr = as.numeric(X2))

#Correct answers on quick tap are negatively associated with PHQ
#Incorrect answers on CT3 and CT1 reaction time are also negatively correlated (less so)
head(T1.Corrs[order(T1.Corrs$Corr),])
##                                 X1                 X2       Corr
## X6   tot.correct_quick.tap.level.2 -0.540560695789421 -0.5405607
## X16 prop.correct_quick.tap.level.2 -0.540560695789421 -0.5405607
## X9     tot.incorrect_color.trick.3 -0.374302470027265 -0.3743025
## X17            avgRT_color.trick.1  -0.34363839800115 -0.3436384
## X18            avgRT_color.trick.2  -0.20588258929485 -0.2058826
## X19            avgRT_color.trick.3 -0.203493084970735 -0.2034931
#Incorrect quick tap answers are positively correlated with PHQ
#Correct answers on CT3 are similarly positvely correlated (less so)
head(T1.Corrs[order(-T1.Corrs$Corr),])
##                                  X1                X2      Corr
## X1                        PHQ.Score                 1 1.0000000
## X11 tot.incorrect_quick.tap.level.2 0.540560695789421 0.5405607
## X4        tot.correct_color.trick.3 0.374302470027265 0.3743025
## X14      prop.correct_color.trick.3 0.374302470027265 0.3743025
## X21         avgRT_quick.tap.level.2 0.159449315142514 0.1594493
## X3        tot.correct_color.trick.2 0.152402459522374 0.1524025

These results seem to suggest answering correctly on Quick Tap and missing questions on the most difficult Color Trick game are the best predictors of depression scores. Reaction times are not likely great predictors of depression scores. If any, we could look at reaction times from Color Trick 1. The color trick results are all counter-intuitive.

library(ggpubr)
#Negative Correlations
ggarrange(neg1, neg2, neg3, neg4,
          labels = c("QT Total Correct", "QT Proportion Correct",
                     "CT3 Total Incorrect", "CT1 Reaction Time"))

#Positive Correlations
ggarrange(pos1, 
          ggarrange(pos2,
                    pos3,
                    ncol = 2,
                    labels = c("CT3 Total Correct", "CT3 Proportion Correct")),
          nrow = 2, labels = "QT Total Incorrect")

These plots show Quick Tap is likely our only good proxy for depression on these games. This game requires seeing an image, processing it, and deciding whether to tap or inhibit a response based on that image.

Next, we used model selection techniques in a multiple regression model as well.

#Note that the model has singularity problems because high level of overlap
#For example, correct and incorrect obviously are not independent.
lmod <- lm(PHQ.Score ~ ., data = T1s)
summary(lmod)
## 
## Call:
## lm(formula = PHQ.Score ~ ., data = T1s)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.4379  -2.0941   0.5012   1.7280   7.0671 
## 
## Coefficients: (8 not defined because of singularities)
##                                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                     25.2400500 27.5292967   0.917  0.37135   
## tot.correct_color.trick.1        0.1652423  1.9025915   0.087  0.93175   
## tot.correct_color.trick.2        0.0893988  1.3674096   0.065  0.94859   
## tot.correct_color.trick.3        1.5246408  0.6326337   2.410  0.02687 * 
## tot.correct_hand.swype          -0.0378204  0.1648541  -0.229  0.82113   
## tot.correct_quick.tap.level.2   -2.7394663  0.9503382  -2.883  0.00991 **
## tot.incorrect_color.trick.1             NA         NA      NA       NA   
## tot.incorrect_color.trick.2             NA         NA      NA       NA   
## tot.incorrect_color.trick.3             NA         NA      NA       NA   
## tot.incorrect_hand.swype         0.3251302  0.3447474   0.943  0.35812   
## tot.incorrect_quick.tap.level.2         NA         NA      NA       NA   
## prop.correct_color.trick.1              NA         NA      NA       NA   
## prop.correct_color.trick.2              NA         NA      NA       NA   
## prop.correct_color.trick.3              NA         NA      NA       NA   
## prop.correct_hand.swype          9.4122322 15.5691223   0.605  0.55303   
## prop.correct_quick.tap.level.2          NA         NA      NA       NA   
## avgRT_color.trick.1             -0.0077450  0.0033451  -2.315  0.03260 * 
## avgRT_color.trick.2             -0.0018515  0.0030241  -0.612  0.54802   
## avgRT_color.trick.3              0.0032617  0.0031841   1.024  0.31922   
## avgRT_hand.swype                 0.0004805  0.0043112   0.111  0.91249   
## avgRT_quick.tap.level.2          0.0136089  0.0117068   1.162  0.26022   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.188 on 18 degrees of freedom
## Multiple R-squared:  0.5844, Adjusted R-squared:  0.3073 
## F-statistic: 2.109 on 12 and 18 DF,  p-value: 0.07402
#This call shows us which predictors (1-9) should be included in the model
library(leaps)
AIC <- regsubsets(PHQ.Score ~ ., data = T1s)
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 8 linear dependencies found
## Reordering variables and trying again:
rs <- summary(AIC)
rs$which
##   (Intercept) tot.correct_color.trick.1 tot.correct_color.trick.2
## 1        TRUE                     FALSE                     FALSE
## 2        TRUE                     FALSE                     FALSE
## 3        TRUE                     FALSE                     FALSE
## 4        TRUE                     FALSE                     FALSE
## 5        TRUE                     FALSE                     FALSE
## 6        TRUE                     FALSE                     FALSE
## 7        TRUE                     FALSE                     FALSE
## 8        TRUE                     FALSE                     FALSE
## 9        TRUE                     FALSE                     FALSE
##   tot.correct_color.trick.3 tot.correct_hand.swype
## 1                     FALSE                  FALSE
## 2                     FALSE                  FALSE
## 3                     FALSE                  FALSE
## 4                     FALSE                  FALSE
## 5                      TRUE                  FALSE
## 6                      TRUE                  FALSE
## 7                      TRUE                  FALSE
## 8                     FALSE                  FALSE
## 9                     FALSE                   TRUE
##   tot.correct_quick.tap.level.2 tot.incorrect_color.trick.1
## 1                         FALSE                       FALSE
## 2                         FALSE                       FALSE
## 3                          TRUE                       FALSE
## 4                         FALSE                       FALSE
## 5                         FALSE                       FALSE
## 6                          TRUE                       FALSE
## 7                         FALSE                       FALSE
## 8                         FALSE                       FALSE
## 9                         FALSE                       FALSE
##   tot.incorrect_color.trick.2 tot.incorrect_color.trick.3
## 1                       FALSE                       FALSE
## 2                       FALSE                       FALSE
## 3                       FALSE                       FALSE
## 4                       FALSE                        TRUE
## 5                       FALSE                       FALSE
## 6                       FALSE                       FALSE
## 7                       FALSE                       FALSE
## 8                       FALSE                        TRUE
## 9                       FALSE                        TRUE
##   tot.incorrect_hand.swype tot.incorrect_quick.tap.level.2
## 1                    FALSE                           FALSE
## 2                    FALSE                            TRUE
## 3                    FALSE                           FALSE
## 4                     TRUE                            TRUE
## 5                     TRUE                            TRUE
## 6                     TRUE                           FALSE
## 7                     TRUE                            TRUE
## 8                     TRUE                            TRUE
## 9                     TRUE                            TRUE
##   prop.correct_color.trick.1 prop.correct_color.trick.2
## 1                      FALSE                      FALSE
## 2                      FALSE                      FALSE
## 3                      FALSE                      FALSE
## 4                      FALSE                      FALSE
## 5                      FALSE                      FALSE
## 6                      FALSE                      FALSE
## 7                      FALSE                      FALSE
## 8                      FALSE                      FALSE
## 9                      FALSE                      FALSE
##   prop.correct_color.trick.3 prop.correct_hand.swype
## 1                      FALSE                   FALSE
## 2                      FALSE                   FALSE
## 3                       TRUE                   FALSE
## 4                      FALSE                   FALSE
## 5                      FALSE                   FALSE
## 6                      FALSE                   FALSE
## 7                      FALSE                   FALSE
## 8                      FALSE                    TRUE
## 9                      FALSE                    TRUE
##   prop.correct_quick.tap.level.2 avgRT_color.trick.1 avgRT_color.trick.2
## 1                           TRUE               FALSE               FALSE
## 2                          FALSE                TRUE               FALSE
## 3                          FALSE                TRUE               FALSE
## 4                          FALSE                TRUE               FALSE
## 5                          FALSE                TRUE               FALSE
## 6                          FALSE                TRUE               FALSE
## 7                          FALSE                TRUE                TRUE
## 8                          FALSE                TRUE                TRUE
## 9                          FALSE                TRUE                TRUE
##   avgRT_color.trick.3 avgRT_hand.swype avgRT_quick.tap.level.2
## 1               FALSE            FALSE                   FALSE
## 2               FALSE            FALSE                   FALSE
## 3               FALSE            FALSE                   FALSE
## 4               FALSE            FALSE                   FALSE
## 5               FALSE            FALSE                    TRUE
## 6                TRUE            FALSE                    TRUE
## 7                TRUE            FALSE                    TRUE
## 8                TRUE            FALSE                    TRUE
## 9                TRUE            FALSE                    TRUE
#We can plot it to see the best number of predictors
#We can get an idea of the optimal number of predictors based on the minimum of the plot
AIC2 <- 50*log(rs$rss/50) + (2:10)*2
plot(AIC2 ~ I(1:9), ylab = "AIC", xlab = "# of Predictors")

#We see five predictors are ideal

rs$which[5,]
##                     (Intercept)       tot.correct_color.trick.1 
##                            TRUE                           FALSE 
##       tot.correct_color.trick.2       tot.correct_color.trick.3 
##                           FALSE                            TRUE 
##          tot.correct_hand.swype   tot.correct_quick.tap.level.2 
##                           FALSE                           FALSE 
##     tot.incorrect_color.trick.1     tot.incorrect_color.trick.2 
##                           FALSE                           FALSE 
##     tot.incorrect_color.trick.3        tot.incorrect_hand.swype 
##                           FALSE                            TRUE 
## tot.incorrect_quick.tap.level.2      prop.correct_color.trick.1 
##                            TRUE                           FALSE 
##      prop.correct_color.trick.2      prop.correct_color.trick.3 
##                           FALSE                           FALSE 
##         prop.correct_hand.swype  prop.correct_quick.tap.level.2 
##                           FALSE                           FALSE 
##             avgRT_color.trick.1             avgRT_color.trick.2 
##                            TRUE                           FALSE 
##             avgRT_color.trick.3                avgRT_hand.swype 
##                           FALSE                           FALSE 
##         avgRT_quick.tap.level.2 
##                            TRUE
#The command below shows us the five predictors are:
#CT3 Total Correct
#HS Total Incorrect
#QT Total Incorrect
#QT Reaction Time
#CT1 Reaction Time

#Adjusted R Square corrects for the number of predictors
#In this case, we see six predictors is the best option
plot(2:10, rs$adjr2, ylab = "Adj. R^2", xlab = "# of Parameters")

rs$which[6,]
##                     (Intercept)       tot.correct_color.trick.1 
##                            TRUE                           FALSE 
##       tot.correct_color.trick.2       tot.correct_color.trick.3 
##                           FALSE                            TRUE 
##          tot.correct_hand.swype   tot.correct_quick.tap.level.2 
##                           FALSE                            TRUE 
##     tot.incorrect_color.trick.1     tot.incorrect_color.trick.2 
##                           FALSE                           FALSE 
##     tot.incorrect_color.trick.3        tot.incorrect_hand.swype 
##                           FALSE                            TRUE 
## tot.incorrect_quick.tap.level.2      prop.correct_color.trick.1 
##                           FALSE                           FALSE 
##      prop.correct_color.trick.2      prop.correct_color.trick.3 
##                           FALSE                           FALSE 
##         prop.correct_hand.swype  prop.correct_quick.tap.level.2 
##                           FALSE                           FALSE 
##             avgRT_color.trick.1             avgRT_color.trick.2 
##                            TRUE                           FALSE 
##             avgRT_color.trick.3                avgRT_hand.swype 
##                            TRUE                           FALSE 
##         avgRT_quick.tap.level.2 
##                            TRUE
#The added predictor is reaction time for CT3

#Mallow's CP suggests 3 predictors may be ideal
plot(2:10, rs$cp, ylab = "CP Statistic", xlab = "# of Parameters")

rs$which[3,] #The only three are QT total correct, CT1 reaction time, and CT3 proportion correct
##                     (Intercept)       tot.correct_color.trick.1 
##                            TRUE                           FALSE 
##       tot.correct_color.trick.2       tot.correct_color.trick.3 
##                           FALSE                           FALSE 
##          tot.correct_hand.swype   tot.correct_quick.tap.level.2 
##                           FALSE                            TRUE 
##     tot.incorrect_color.trick.1     tot.incorrect_color.trick.2 
##                           FALSE                           FALSE 
##     tot.incorrect_color.trick.3        tot.incorrect_hand.swype 
##                           FALSE                           FALSE 
## tot.incorrect_quick.tap.level.2      prop.correct_color.trick.1 
##                           FALSE                           FALSE 
##      prop.correct_color.trick.2      prop.correct_color.trick.3 
##                           FALSE                            TRUE 
##         prop.correct_hand.swype  prop.correct_quick.tap.level.2 
##                           FALSE                           FALSE 
##             avgRT_color.trick.1             avgRT_color.trick.2 
##                            TRUE                           FALSE 
##             avgRT_color.trick.3                avgRT_hand.swype 
##                           FALSE                           FALSE 
##         avgRT_quick.tap.level.2 
##                           FALSE
lmodAIC <- lm(PHQ.Score ~ tot.correct_color.trick.3 +
                          tot.incorrect_hand.swype +
                          tot.incorrect_quick.tap.level.2 +
                          avgRT_quick.tap.level.2 +
                          avgRT_color.trick.1 , data = T1s)

summary(lmodAIC)
## 
## Call:
## lm(formula = PHQ.Score ~ tot.correct_color.trick.3 + tot.incorrect_hand.swype + 
##     tot.incorrect_quick.tap.level.2 + avgRT_quick.tap.level.2 + 
##     avgRT_color.trick.1, data = T1s)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3658  -2.0774   0.3457   2.3415   6.3311 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                     -0.155238   6.855908  -0.023  0.98211   
## tot.correct_color.trick.3        1.237342   0.510252   2.425  0.02287 * 
## tot.incorrect_hand.swype         0.174400   0.107517   1.622  0.11733   
## tot.incorrect_quick.tap.level.2  2.633518   0.791739   3.326  0.00272 **
## avgRT_quick.tap.level.2          0.012398   0.008744   1.418  0.16856   
## avgRT_color.trick.1             -0.007499   0.002678  -2.800  0.00972 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.696 on 25 degrees of freedom
## Multiple R-squared:  0.5506, Adjusted R-squared:  0.4607 
## F-statistic: 6.126 on 5 and 25 DF,  p-value: 0.000779
#We see that missing questions on quick tap is associated with depression
#Slower reaction times on color trick 1 are associated with more depression
#More correct questions on color trick 3 are asssociated with more depression

lmodRsq <- lm(PHQ.Score ~ tot.correct_color.trick.3 +
                tot.incorrect_hand.swype +
                tot.incorrect_quick.tap.level.2 +
                avgRT_quick.tap.level.2 +
                avgRT_color.trick.1 +
                avgRT_color.trick.3, data = T1s)

summary(lmodRsq)
## 
## Call:
## lm(formula = PHQ.Score ~ tot.correct_color.trick.3 + tot.incorrect_hand.swype + 
##     tot.incorrect_quick.tap.level.2 + avgRT_quick.tap.level.2 + 
##     avgRT_color.trick.1 + avgRT_color.trick.3, data = T1s)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6428  -1.7348   0.6877   2.5948   6.2242 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                     -1.2971599  7.2981109  -0.178  0.86042   
## tot.correct_color.trick.3        1.2587514  0.5195181   2.423  0.02331 * 
## tot.incorrect_hand.swype         0.1550787  0.1153018   1.345  0.19120   
## tot.incorrect_quick.tap.level.2  2.7337683  0.8264724   3.308  0.00296 **
## avgRT_quick.tap.level.2          0.0123326  0.0088756   1.389  0.17744   
## avgRT_color.trick.1             -0.0079576  0.0028587  -2.784  0.01031 * 
## avgRT_color.trick.3              0.0009785  0.0018858   0.519  0.60861   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.751 on 24 degrees of freedom
## Multiple R-squared:  0.5556, Adjusted R-squared:  0.4445 
## F-statistic: 5.001 on 6 and 24 DF,  p-value: 0.001884
#Here, the same predictors are all significant

lmodMallow <- lm(PHQ.Score ~ tot.correct_quick.tap.level.2 +
                             prop.correct_color.trick.3 +
                             avgRT_color.trick.1, data = T1s)

summary(lmodMallow)
## 
## Call:
## lm(formula = PHQ.Score ~ tot.correct_quick.tap.level.2 + prop.correct_color.trick.3 + 
##     avgRT_color.trick.1, data = T1s)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.709  -1.705  -0.306   2.470   6.339 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                   49.034265  13.573190   3.613  0.00122 **
## tot.correct_quick.tap.level.2 -2.482967   0.828985  -2.995  0.00581 **
## prop.correct_color.trick.3     7.092193   4.110101   1.726  0.09586 . 
## avgRT_color.trick.1           -0.007158   0.002803  -2.553  0.01663 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.884 on 27 degrees of freedom
## Multiple R-squared:  0.4639, Adjusted R-squared:  0.4043 
## F-statistic: 7.788 on 3 and 27 DF,  p-value: 0.0006667
#Total correct on quick tap was negatively associated with depression
#Slower reaction times on color trick one were also negatively associated with depression
#The 

Quick Tap Exploratory Data Analysis

With Quick Tap correct questions established as a good proxy for depression scores, we can see how our mindfulness intervention impacts changes in this outcome over time.

QT <- filter(PHQ.Corrs, game_name == "quick tap level 2")

str(QT)
## 'data.frame':    88 obs. of  15 variables:
##  $ Study.ID          : int  22585580 22585580 22585580 22585581 22585581 22585581 22585582 22585582 22585582 22585583 ...
##  $ timepoint         : num  1 2 3 1 2 3 1 2 3 2 ...
##  $ ID                : Factor w/ 33 levels "101","102","103",..: 1 1 1 7 7 7 11 11 11 14 ...
##  $ game_name         : chr  "quick tap level 2" "quick tap level 2" "quick tap level 2" "quick tap level 2" ...
##  $ tot.correct       : int  13 15 14 15 15 15 14 15 15 14 ...
##  $ tot.incorrect     : int  2 0 1 0 0 0 1 0 0 1 ...
##  $ prop.correct      : num  0.867 1 0.933 1 1 ...
##  $ avgRT             : num  463 428 421 488 447 ...
##  $ X                 : chr  "" "" "" "" ...
##  $ NeurUX.ID         : chr  "user1" "user1" "user1" "user7" ...
##  $ Headspace.ID      : chr  "SCILOY-139ZCT" "SCILOY-139ZCT" "SCILOY-139ZCT" "SCILOY-NDWT0Q" ...
##  $ Intervention.Group: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ semester          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Int.Fac           : Factor w/ 2 levels "Control","Intervention": 2 2 2 2 2 2 2 2 2 2 ...
##  $ PHQ.Score         : num  16 10 4 9 9 9 17 18 15 22 ...

This plot shows that people in the control group did worse on this task over time, while the intervention group was stable across timepoints.

QT%>%
  ggplot(aes(x = timepoint, y = tot.correct)) +
  geom_jitter(alpha = .5, width = .1)+
  geom_smooth(method = "lm", formula = "y ~ x")+
  facet_wrap(.~Int.Fac)

We can also check the individual trajectories in a “spaghetti plot”

QT$ID.Fac <- factor(QT$ID)

QT%>%
  ggplot(aes(x = timepoint, y = tot.correct, color = ID.Fac))+
  geom_point()+
  geom_smooth(se = FALSE,
              method = "lm",
              formula = "y ~ x",
              size = .5)+
  facet_wrap(.~Int.Fac)+
  theme(legend.position = "none")

Finally, we can look at individual trajectories over time. We see further evidennce that the effect of the intervention is not consistent across all participants.

QT%>%
  filter(Int.Fac == "Control")%>%
  ggplot(aes(x = timepoint, y = tot.correct))+
  geom_point()+
  geom_smooth(se = FALSE,
              method = "lm",
              formula = "y ~ x",
              lty = "dashed",
              size = .5)+
  facet_wrap(.~ID.Fac)

QT%>%
  filter(Int.Fac == "Intervention")%>%
  ggplot(aes(x = timepoint, y = tot.correct))+
  geom_point()+
  geom_smooth(se = FALSE,
              method = "lm",
              formula = "y ~ x",
              lty = "dashed",
              size = .5)+
  facet_wrap(.~ID.Fac)

Quick Tap Statistical Analysis

Need to add later